library(pacman)

p_load(here, tidyverse, zoo, lubridate, ggplot2, plotly, gghighlight, sf, tigris, ggrepel, viridis)

options(scipen = 1000000)

specific_aor <- params$aor

static_theme <- function() {
  theme()
}

interactive_theme <- function() {
  theme(plot.title = element_text(size=10),
        legend.title = element_text(size=10)
  )
  }

ICE ERO enforcement data: SPM AOR

# ENCOUNTERS DATA

enc <- read_delim(here('analyze', 'input', 'ice_encounters_fy12-23ytd.csv.gz'), delim='|',
                  col_types = cols(aor = col_factor(),
                                   event_date = col_character(),
                                   landmark = col_character(),
                                   operation = col_factor(),
                                   processing_disposition = col_factor(),
                                   citizenship_country = col_factor(),
                                   gender = col_factor(),
                                   hashid = col_character(),
                                   id = col_number()))

# glimpse(enc)

redacted <- c('encounter_threat_level', 'alien_file_number')

enc <- enc %>% 
  dplyr::select(-redacted)

enc <- enc %>% 
  mutate(aor = factor(aor, levels = sort(levels(enc$aor))),
         event_date = as_date(event_date, format="%m/%d/%Y"),
         year = year(event_date),
         month = month(event_date, label=TRUE, abbr=TRUE),
         year_mth = zoo::as.yearmon(event_date),
         fy_quarter = as.factor(quarter(event_date, fiscal_start=10, type="year.quarter")),
         fy = as.factor(substr(fy_quarter, 1,4)),
         gender = as.factor(toupper(gender)),
         operation = toupper(operation),
         processing_disposition = toupper(processing_disposition),
         citizenship_country = toupper(citizenship_country),
         landmark = toupper(str_squish(landmark))) %>% 
  filter(event_date >= "2011-10-01",
         event_date <= "2022-09-30")

# ARRESTS DATA

arr <- read_delim(here('analyze', 'input', 'ice_arrests_fy12-23ytd.csv.gz'), delim='|',
                  col_types = cols(aor = col_factor(),
                                  arrest_date = col_date(format="%m/%d/%Y"),
                                  departed_date = col_date(format="%m/%d/%Y"),
                                  apprehension_landmark = col_factor(),
                                  arrest_method = col_factor(),
                                  operation = col_factor(),
                                  processing_disposition = col_factor(),
                                  citizenship_country = col_factor(),
                                  gender = col_factor(),
                                  case_closed_date = col_date(format="%m/%d/%Y"),
                                  id = col_integer(),
                                  hashid = col_character()
                                  )) 

redacted <- c('removal_threat_level', 'apprehension_threat_level', 'alien_file_number')

arr <- arr %>% 
  dplyr::select(-all_of(redacted))

arr <- arr %>% 
  mutate(aor = factor(aor, levels = sort(levels(arr$aor))),
         arrest_date = as_date(arrest_date, format="%m/%d/%Y"),
         year = year(arrest_date),
         month = month(arrest_date, label=TRUE, abbr=TRUE),
         year_mth = zoo::as.yearmon(arrest_date),
         fy_quarter = as.factor(quarter(arrest_date, fiscal_start=10, type="year.quarter")),
         fy = as.factor(substr(fy_quarter, 1,4)),
         gender = as.factor(toupper(gender)),
         citizenship_country = as.factor(toupper(citizenship_country)),
         apprehension_landmark = toupper(str_squish(apprehension_landmark))) %>% 
  filter(arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30")

# REMOVALS DATA

pd_dict <- read_delim(here('share', 'hand', 'processing_disp.csv'), delim='|')

rem <- read_delim(here('analyze', 'input', 'ice_removals_fy12-23ytd.csv.gz'), delim='|',
                  col_types = cols(aor = col_factor(),
                                   arrest_date = col_date(format="%m/%d/%Y"),
                                  departed_date = col_date(format="%m/%d/%Y"),
                                  case_close_date = col_date(format="%m/%d/%Y"),
                                  removal_date = col_date(format="%m/%d/%Y"),
                                  apprehension_method_code = col_character(),
                                  processing_disposition_code = col_factor(),
                                  citizenship_country = col_factor(),
                                  gender = col_factor(),
                                  final_charge_section = col_factor(),
                                  id = col_integer(),
                                  hashid = col_character()
                                  )) 

redacted <- c('removal_threat_level', 'alien_file_number')

rem <- rem %>% 
  dplyr::select(-redacted, -case_closed_date)

rem <- rem %>% 
  mutate(aor = factor(aor, levels = sort(levels(rem$aor))),
         year = year(departed_date),
         month = month(departed_date, label=TRUE, abbr=TRUE),
         year_mth = zoo::as.yearmon(departed_date),
         processing_disp = toupper(coalesce(processing_disposition_code, processing_disposition)),
         fy_quarter = as.factor(quarter(departed_date, fiscal_start=10, type="year.quarter")),
         fy = as.factor(substr(fy_quarter, 1,4)),
         gender = as.factor(toupper(gender)),
         citizenship_country = toupper(citizenship_country)) %>% 
  filter(departed_date >= "2011-10-01",
         departed_date <= "2022-09-30")

rem <- left_join(rem, pd_dict, by=c('processing_disp' = 'processing_disposition_raw'))

# SUPPLEMENTAL DATA

demog <- read_delim(here('analyze', 'input', 'aor_demog_indicators.csv.gz'), delim='|') %>%
  arrange(aor, year) %>% 
  mutate(year = as.factor(year))

pc_scale = 100000

# Should get this from an external constants file along with other AOR characteristics
specific_area_of_responsibility <- arr %>%
  filter(aor == specific_aor) %>%
  distinct(area_of_responsibility) %>% 
  as.character()

This notebook provides a basic descriptive overview of ICE ERO-LESA enforcement data for the St. Paul Area of Responsibility (SPM).

Total enforcement actions by FY

enc_fy <- enc %>% 
  filter(aor == specific_aor) %>% 
  group_by(fy) %>% 
  summarize(n_encounters = n())

arr_fy <- arr %>% 
  filter(aor == specific_aor) %>% 
  group_by(fy) %>% 
  summarize(n_arrests = n())

rem_fy <- rem %>% 
  filter(aor == specific_aor) %>% 
  group_by(fy) %>% 
  summarize(n_removals = n())

dat_aor <- left_join(enc_fy, arr_fy, by='fy') %>% 
  left_join(rem_fy, by='fy')

p1 <- dat_aor %>%
  pivot_longer(cols=-c('fy')) %>% 
  ggplot(aes(x = fy, y=value, color=name, group=name)) +
  geom_line() +
  ylim(0, NA) +
  labs(title = paste0("Total ICE enforcement events per FY",
                      '<br>',
                      '<sup>',
                      specific_area_of_responsibility,
                      '</sup>'),
       caption = "Figure: Univ. of Wash. Center for Human Rights, Data: ICE ERO-LESA",
       x = "Fiscal Year",
       y = "") +
  guides(color = guide_legend("Enforcement type"))

ggplotly(p1)

The following chart displays basic comparative measures for the St. Paul Area of Responsibility: difference between total encounters and total arrests (diff_enc_arr); and difference between total arrests and total removals (diff_arr_rem). Values greater than zero

dat_diff <- dat_aor %>% 
  mutate(diff_enc_arr = n_encounters - n_arrests,
         diff_rem_arr = n_removals - n_arrests)

p2 <- dat_diff %>% 
  pivot_longer(cols=-c('fy')) %>% 
  filter(name %in% c("diff_enc_arr", "diff_rem_arr")) %>% 
  ggplot(aes(x = fy, y=value, color=name, group=name)) +
  geom_line() +
  geom_hline(aes(yintercept=0),
             linetype="dashed",
             linewidth=.25) +
  # annotate("text", x = 2, y = 0, label = "Fewer arrests\nMore arrests") +
  labs(title = paste0("Difference of enforcement event totals per FY",
                      '<br>',
                      '<sup>',
                      specific_area_of_responsibility,
                      '</sup>'),
       x = "Fiscal Year",
       y = "")

ggplotly(p2)

Here we compare trends for the St. Paul Area of Responsibility with national trends:

enc_fy_natl <- enc %>% 
  filter(
    event_date <= "2022-09-30") %>% 
  group_by(fy) %>% 
  summarize(n_encounters = n())

arr_fy_natl <- arr %>% 
  filter(
    arrest_date <= "2022-09-30") %>% 
  group_by(fy) %>% 
  summarize(n_arrests = n())

rem_fy_natl <- rem %>% 
  filter(
    departed_date <= "2022-09-30") %>% 
  group_by(fy) %>% 
  summarize(n_removals = n())

dat_natl <- left_join(enc_fy_natl, arr_fy_natl, by='fy') %>% 
  left_join(rem_fy_natl, by='fy')

dat_aor$group <- specific_area_of_responsibility
dat_natl$group <- "National total"

dat <- rbind(dat_aor, dat_natl)

p1 <- dat %>%
  rename(encounters = n_encounters,
         arrests = n_arrests,
         removals = n_removals) %>% 
  pivot_longer(cols=-c('fy', 'group')) %>% 
  ggplot(aes(x = fy, y=value, color=name, group=name, text=group)) +
  geom_line() +
  labs(title = "Total ICE enforcement events, FY2012-22") +
  xlab('Fiscal Year') +
  scale_x_discrete(breaks=as.character(seq(2013,2024,4))) +
  ylab("") +
  ylim(0, NA) +
  scale_color_discrete(name = "Enforcement type") +
  facet_wrap(~group, scales = "free_y")

ggplotly(p1, tooltip = c("x", "y", "color", "text"))

Percent change

Annual percent change in enforcement actions per FY, St. Paul Area of Responsibility compared to national.

dat_pct <- dat %>% 
  group_by(group) %>% 
  mutate(across(starts_with('n_'), ~ (.x/lag(.x) - 1))) %>% 
  pivot_longer(cols = c(-fy, -group))

p3 <- dat_pct %>% 
  filter(!is.na(value)) %>% 
  ggplot(aes(x = fy, y = value, fill=name, group=name)) +
  geom_col(position='dodge') +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(breaks=seq(2013,2024,4)) +
  facet_wrap(~group) +
  labs(title = "Annual % change total ICE enforcement actions, FY12-22")

ggplotly(p3)

Encounters

Encounters per capita

enc_per_aor <- enc %>% 
  filter(!is.na(aor),
         aor != "HQ",
        event_date <= "2022-09-30") %>% 
  group_by(fy, aor) %>% 
  summarize(n = n())

enc_pc_per_aor <- left_join(enc_per_aor, demog, by=c('fy' = 'year', 'aor' = 'aor')) %>% 
  group_by(aor) %>% 
  fill(contains('pop')) %>% 
  mutate(n_per_cap = (n / total_pop) * pc_scale,
         n_per_undocu = (n / undocu_pop) * pc_scale) %>% 
  arrange(fy, desc(n_per_cap)) %>% 
  group_by(fy) %>% 
  mutate(pc_rank = row_number())

pc_encounter_rank_fy12 <- as.numeric(enc_pc_per_aor[enc_pc_per_aor$aor == specific_aor & enc_pc_per_aor$fy == 2012, 'pc_rank'])
pc_encounter_rank_fy22 <- as.numeric(enc_pc_per_aor[enc_pc_per_aor$aor == specific_aor & enc_pc_per_aor$fy == 2022, 'pc_rank'])

p1 <- enc_pc_per_aor %>% 
  ggplot(aes(x = fy, y=n_per_cap, color=aor, group=aor)) +
  geom_line() +
  gghighlight(aor == specific_aor, use_direct_label = FALSE) +
  xlab("Fiscal Year") +
  ylab("Encounters per capita") +
  labs(title = "ICE encounters per 100,000 residents",
       subtitle = paste0(specific_area_of_responsibility, " highlighted"))

p1  

Encounters by gender

p1 <- enc %>%
  filter(aor == specific_aor) %>% 
  count(fy, gender) %>% 
  ggplot(aes(x=fy, y=n, fill=gender)) +
  geom_col(position='fill') +
  scale_y_continuous(labels = scales::percent) +
  ylab("") +
  xlab('Fiscal Year') +
  labs(title="ICE encounters, % by gender",
       subtitle=specific_area_of_responsibility)

p1

Encounters by citizenship_country

cit <- enc %>%
  filter(aor == specific_aor) %>%
  mutate(citizenship_country = toupper(citizenship_country)) %>% 
  group_by(citizenship_country) %>% 
  summarize(n = n()) %>% 
  arrange(desc(n))

p1 <- enc %>%
  filter(aor == specific_aor) %>%
  mutate(citizenship_country = case_when(
    citizenship_country %in% head(cit$citizenship_country, 15) ~ citizenship_country,
    TRUE ~ "ALL OTHERS"
  )) %>% 
  count(fy, citizenship_country) %>% 
  ggplot(aes(x=fy, y=n, fill=citizenship_country, color=citizenship_country)) +
  geom_col() +
  labs(title = paste0("ICE encounters by country of citizenship (top 15)",
                      '<br>',
                      '<sup>',
                      specific_area_of_responsibility,
                      '</sup>'))

ggplotly(p1)

Encounter landmark

landmarks <- enc %>% 
  filter(aor == specific_aor) %>% 
  count(landmark) %>% 
  arrange(desc(n))

p1 <- enc %>% 
  filter(aor == specific_aor) %>% 
  mutate(landmark = case_when(landmark %in% head(landmarks$landmark, 15) ~ as.character(landmark), 
                                         TRUE ~ "ALL OTHERS"),
         landmark_abbrv = str_trunc(landmark, 20)) %>% 
  group_by(fy, landmark_abbrv, landmark) %>% 
  summarize(n = n()) %>% 
  ggplot(aes(x = fy, y=n,
             fill=landmark_abbrv,
             color=landmark_abbrv,
             text = landmark)) +
  geom_col() +
  labs(title = paste0("Total ICE encounters per FY by `landmark` (top 15)",
                      '<br>',
                      '<sup>',
                      specific_area_of_responsibility,
                      '</sup>'))

ggplotly(p1, tooltip=c('x', 'y', 'text'))

Encounters by operation

ops <- enc %>% 
  filter(aor == specific_aor) %>% 
  count(operation) %>% 
  arrange(desc(n))

p1 <- enc %>% 
  filter(aor == specific_aor) %>% 
  mutate(operation_short = case_when(operation %in% head(ops$operation, 10) ~ as.character(operation), 
                                         TRUE ~ "ALL OTHERS")) %>% 
  group_by(fy, operation_short) %>% 
  summarize(n = n()) %>% 
  ggplot(aes(x = fy, y=n, fill=operation_short)) +
  geom_col() +
  labs(title = "Total ICE encounters per FY by operation")

ggplotly(p1)

Encounters by processing_disposition

aor_disps <- enc %>%
  filter(aor == specific_aor) %>% 
  count(processing_disposition) %>% 
  arrange(desc(n))

p1 <- enc %>% 
  filter(aor == specific_aor) %>%
  mutate(disp_short = case_when(processing_disposition %in% head(aor_disps$processing_disposition, 10) ~ as.character(processing_disposition), 
                                         TRUE ~ "ALL OTHERS")) %>% 
  group_by(fy, disp_short) %>% 
  summarize(n = n()) %>% 
  ggplot(aes(x = fy, y=n, fill=disp_short, color=disp_short)) +
  geom_col() +
  labs(title = paste0("Total ICE encounters per FY by `processing disposition` (top 15)",
                      '<br>',
                      '<sup>',
                      specific_area_of_responsibility,
                      '</sup>'))

ggplotly(p1, dynamicTicks = TRUE)

Arrests

Arrests per capita

arr_per_aor <- arr %>% 
  filter(!is.na(aor),
         aor != "HQ",
        arrest_date <= "2022-09-30") %>% 
  group_by(fy, aor) %>% 
  summarize(n = n())

arr_pc_per_aor <- left_join(arr_per_aor, demog, by=c('fy' = 'year', 'aor' = 'aor')) %>% 
  group_by(aor) %>% 
  fill(contains('pop')) %>% 
  mutate(n_per_cap = (n / total_pop) * pc_scale,
         n_per_undocu = (n / undocu_pop) * pc_scale) %>% 
  arrange(fy, desc(n_per_cap)) %>% 
  group_by(fy) %>% 
  mutate(pc_rank = row_number())

p1 <- arr_pc_per_aor %>% 
  ggplot(aes(x = fy, y=n_per_cap, color=aor, group=aor)) +
  geom_line() +
  gghighlight(aor == specific_aor, use_direct_label = FALSE) +
  labs(title = "ICE arrests per 100,000 residents",
       subtitle = paste0(specific_area_of_responsibility, " highlighted"))

p1  

# p2 <- arr_pc_per_aor %>% 
#   ggplot(aes(x = fy, y=pc_rank, color=aor, group=aor)) +
#   geom_line() +
#   gghighlight(aor == specific_aor, use_direct_label = FALSE) +
#   scale_y_reverse() +
#   labs(title = "ICE arrests per 100,000 residents, AOR rank",
#        subtitle = paste0(specific_area_of_responsibility, " highlighted"))
# 
# p2

p3 <- arr_pc_per_aor %>% 
  filter(aor == specific_aor) %>% 
  ggplot(aes(x = fy, y=n_per_cap, color=aor, group=aor)) +
  geom_line() +
  ylim(0, NA) +
  labs(title = "ICE arrests per 100,000 residents",
       subtitle = paste0(specific_area_of_responsibility))

p3  

Arrests by gender

p1 <- arr %>% 
  filter(aor == specific_aor,
         arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>%
  mutate(gender = as.factor(toupper(gender))) %>% 
  count(fy, gender) %>% 
  ggplot(aes(x=fy, y=n, fill=gender)) +
  geom_col(position='fill') +
  scale_y_continuous(labels = scales::percent) +
  labs(title="Total ICE arrests, % by gender",
       subtitle=specific_area_of_responsibility)

p1

Arrests by citizenship_country

cit <- arr %>%
  filter(aor == specific_aor,
         arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>%
  mutate(citizenship_country = toupper(citizenship_country)) %>% 
  group_by(citizenship_country) %>% 
  summarize(n = n()) %>% 
  arrange(desc(n))

cit_categories <- 10

p1 <- arr %>% 
  filter(aor == specific_aor,
        arrest_date <= "2022-09-30") %>%
  mutate(citizenship_country = case_when(
    citizenship_country %in% head(cit$citizenship_country, cit_categories) ~ citizenship_country,
    TRUE ~ "ALL OTHERS"
  )) %>% 
  count(fy, citizenship_country) %>% 
  ggplot(aes(x=fy, y=n, fill=citizenship_country)) +
  geom_col() +
  labs(title = paste0("Total ICE arrests by country of citizenship (top ", cit_categories, ")"),
       subtitle = specific_area_of_responsibility)

p1

cit_rank <- arr %>% 
  filter(aor == specific_aor,
         arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>%
  count(fy, citizenship_country) %>% 
  arrange(fy, desc(n), citizenship_country) %>% 
  group_by(fy) %>% 
  mutate(ranking = row_number())

p1 <- cit_rank %>% 
  filter(ranking <= 10) %>% 
  ggplot(aes(x = fy, y = ranking, color = citizenship_country, group = citizenship_country)) +
  geom_line(alpha = .7, size = 1) +
  geom_point(alpha = .7, size = 2) +
  scale_y_reverse() +
  labs(title = "Ranked country of citizenship for ICE arrests",
       subtitle = specific_area_of_responsibility)

ggplotly(p1)

Arrests by arrest_method

methods <- arr %>% 
  filter(aor == specific_aor,
         arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>%
  count(arrest_method) %>% 
  arrange(desc(n))

top_methods <- methods %>%
  filter(n > 100)

arr <- arr %>% 
  mutate(arrest_method_short = case_when(arrest_method %in% unlist(top_methods$arrest_method) ~ as.character(arrest_method), 
                                         TRUE ~ "All others"))

p1 <- arr %>% 
  filter(aor == specific_aor,
         arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>%
  group_by(fy, arrest_method_short) %>%
  ggplot(aes(x = fy, fill=arrest_method_short)) +
  geom_bar(stat='count', position='stack')

ggplotly(p1)
dat <- arr %>% 
  filter(aor == specific_aor,
         arrest_method_short %in% c("CAP Local Incarceration",
                                    "CAP State Incarceration",
                                    "CAP Federal Incarceration"),
         arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>%
  count(arrest_method_short, fy)

p2 <- dat %>% 
  ggplot(aes(x = fy, y=n, fill=arrest_method_short)) +
  geom_col(position='dodge') +
  labs(title = "Total CAP arrests, FY12-22",
       subtitle = specific_area_of_responsibility)

p2

dat <- arr %>% 
  filter(arrest_method_short %in% c("CAP Local Incarceration",
                                    "CAP State Incarceration",
                                    "CAP Federal Incarceration"),
         arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>%
  count(arrest_method_short, fy)

p3 <- dat %>% 
  ggplot(aes(x = fy, y=n, fill=arrest_method_short)) +
  geom_col(position='dodge') +
  labs(title = "Total CAP arrests, FY12-22",
       subtitle = "National")

p3

dat1 <- arr %>% 
  filter(arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>% 
  count(fy, arrest_method_short) %>% 
  mutate(group = "National")

dat2 <- arr %>% 
  filter(aor == specific_aor,
         arrest_date >= "2011-10-01",
         arrest_date <= "2022-09-30") %>% 
  count(fy, arrest_method_short) %>%
  mutate(group = specific_area_of_responsibility)

dat <- rbind(dat1, dat2)

p1 <- dat %>% 
  filter(group == "National") %>% 
  ggplot(aes(x = fy, y = n, color = arrest_method_short, group=arrest_method_short)) +
  geom_line() +
  facet_wrap(~group, scales = "free_y")

p1

p2 <- dat %>% 
  filter(group == specific_area_of_responsibility) %>% 
  ggplot(aes(x = fy, y = n, color = arrest_method_short, group=arrest_method_short)) +
  geom_line() +
  facet_wrap(~group, scales = "free_y")

p2

Arrests by processing_disposition

disps <- arr %>% 
  filter(aor == specific_aor) %>% 
  count(processing_disposition) %>% 
  arrange(desc(n))

arr <- arr %>% 
  mutate(disp_short = case_when(processing_disposition %in% head(disps$processing_disposition, 10) ~ as.character(processing_disposition), 
                                         TRUE ~ "ALL OTHERS"))

p1 <- arr %>%
  filter(aor == specific_aor) %>% 
  group_by(fy, disp_short) %>% 
  summarize(n = n()) %>% 
  ggplot(aes(x = fy, y=n, fill=disp_short)) +
  geom_col() +
  labs(title = "Total ICE arrests per FY by processing disposition")

ggplotly(p1)

Arrests by apprehension_landmark

landmarks <- arr %>% 
  filter(aor == specific_aor) %>% 
  count(apprehension_landmark) %>% 
  arrange(desc(n))

# Abbreviating values in data can collapse some categories inadvertently.
# Need to figure out how to shorten labels only, preferably in a way that works with plotly

p1 <- arr %>% 
  filter(aor == specific_aor) %>% 
  mutate(apprehension_landmark = case_when(apprehension_landmark %in% head(landmarks$apprehension_landmark, 15) ~ as.character(apprehension_landmark), 
                                         TRUE ~ "ALL OTHERS"),
         apprehension_landmark_abbrv = str_trunc(apprehension_landmark, 20)) %>% 
  group_by(fy, apprehension_landmark_abbrv, apprehension_landmark) %>% 
  summarize(n = n()) %>% 
  ggplot(aes(x = fy, y=n,
             fill= apprehension_landmark_abbrv,
             color= apprehension_landmark_abbrv,
             text = apprehension_landmark)) +
  geom_col() +
  labs(title = "Total ICE arrests per FY by `apprehension_landmark` (top 15)",
       subtitle = specific_area_of_responsibility)

ggplotly(p1, tooltip=c('x', 'y', 'text'))

Removals

Removals per capita

rem_per_aor <- rem %>% 
  filter(!is.na(aor),
         aor != "HQ",
         departed_date >= "2011-10-01",
        departed_date <= "2022-09-30") %>% 
  group_by(fy, aor) %>% 
  summarize(n = n())

rem_pc_per_aor <- left_join(rem_per_aor, demog, by=c('fy' = 'year', 'aor' = 'aor')) %>% 
  group_by(aor) %>% 
  fill(contains('pop')) %>% 
  mutate(n_per_cap = (n / total_pop) * pc_scale,
         n_per_undocu = (n / undocu_pop) * pc_scale) %>% 
  arrange(fy, desc(n_per_cap)) %>% 
  group_by(fy) %>% 
  mutate(pc_rank = row_number())

pc_removals_rank_fy12 <- as.numeric(rem_pc_per_aor[rem_pc_per_aor$aor == specific_aor & rem_pc_per_aor$fy == 2012, 'pc_rank'])
pc_removals_rank_fy22 <- as.numeric(rem_pc_per_aor[rem_pc_per_aor$aor == specific_aor & rem_pc_per_aor$fy == 2022, 'pc_rank'])

p1 <- rem_pc_per_aor %>% 
  ggplot(aes(x = fy, y=n_per_cap, color=aor, group=aor)) +
  geom_line() +
  gghighlight(aor == specific_aor) +
  labs(title = "ICE removals per 100,000 residents",
       subtitle = paste0(specific_area_of_responsibility, " highlighted"))

p1  

Removals by gender

# rem %>%
#   mutate(gender = tolower(gender)) %>% 
#   group_by(gender) %>% 
#   summarize(n = n())

p1 <- rem %>% 
  filter(aor == specific_aor) %>% 
  count(fy, gender) %>% 
  ggplot(aes(x=fy, y=n, fill=gender)) +
  geom_col(position='fill') +
  scale_y_continuous(labels = scales::percent) +
  labs(title="Total ICE removals, % by gender")

p1

Removals by citizenship_country

cit <- rem %>%
  filter(aor == specific_aor) %>% 
  mutate(citizenship_country = toupper(citizenship_country)) %>% 
  group_by(citizenship_country) %>% 
  summarize(n = n()) %>% 
  arrange(desc(n))

p1 <- rem %>% 
  filter(aor == specific_aor) %>% 
  mutate(citizenship_country = case_when(
    citizenship_country %in% head(cit$citizenship_country, 15) ~ citizenship_country,
    TRUE ~ "ALL OTHERS"
  )) %>% 
  count(fy, citizenship_country) %>% 
  ggplot(aes(x=fy, y=n, fill=citizenship_country, color=citizenship_country)) +
  geom_col() +
  labs(title = "Total ICE removals by country of citizenship (top 15)")

ggplotly(p1)
# % change in removal by group?

Removals by processing_disposition

disps <- rem %>% 
  filter(aor == specific_aor) %>% 
  count(processing_disposition_clean) %>% 
  arrange(desc(n))

p1 <- rem %>% 
  filter(aor == specific_aor) %>% 
   mutate(disp_short = case_when(processing_disposition_clean %in% head(disps$processing_disposition_clean, 10) ~ as.character(processing_disposition_clean), 
                                         TRUE ~ "ALL OTHERS")) %>% 
  group_by(fy, disp_short) %>% 
  summarize(n = n()) %>% 
  ggplot(aes(x = fy, y=n, fill=disp_short)) +
  geom_col() +
  labs(title = "Total removals per FY by processing disposition",
       subtitle = specific_area_of_responsibility,
       x = "Fiscal year",
       y = "Count")

ggplotly(p1)